if (!dir.exists("Simulation_studies")) dir.create("Simulation_studies")
if (!dir.exists("Simulation_studies/Batch_size")) dir.create("Simulation_studies/Batch_size")
if (!dir.exists("Simulation_studies/Pop_heterogeneity")) dir.create("Simulation_studies/Pop_heterogeneity")
RUN_ALL <- FALSE # flag whether or not to re-run fitting
# source code for model fitting
source("Model_calibration_code/simulate_clinical_recurrences.R")
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
source("Model_calibration_code/Metropolis_Hastings_fit.R")
# parameters under which data are simulated
PARAM_TRUTH_HET <- expand.grid(LAMBDA_MEAN_TRUTH=c(0.25/365, 0.5/365),
NU_TRUTH=6,
ETA_TRUTH=c(1/400, 1/200, 1/100),
KAPPA_TRUTH=c(0.245, 1.15, 9.1, Inf))
PARAM_TRUTH_HET_SUPP <- expand.grid(LAMBDA_MEAN_TRUTH=c(0.25/365, 0.5/365),
NU_TRUTH=6,
ETA_TRUTH=c(1/400, 1/200, 1/100),
KAPPA_TRUTH=2.485)
PARAM_TRUTH_HET <- rbind(PARAM_TRUTH_HET, PARAM_TRUTH_HET_SUPP)
PARAM_TRUTH_BATCH <- expand.grid(LAMBDA_MEAN_TRUTH=c(0.5/365),
NU_TRUTH=c(4, 6, 8),
ETA_TRUTH=c(1/400, 1/200, 1/100),
R_TRUTH=c(0.25, 0.5, 1, 2))
PREL <- 0.4
N_OBS <- 65
TIME_STEP <- 10
PROPHYLAXIS_WINDOW <- 1
BUNCHING_WINDOW <- 2
N_AGES <- rep((2:15)*(365%/%TIME_STEP), each=80)
PCLIN <- rep(1, length(N_AGES))
SEASONALITY <- rep(1, N_OBS+max(N_AGES))
MAX_INF <- 12
# parameters for Metropolis-Hastings proposal distribution
LAMBDA_PROP_SD <- 0.02/365
NU_PROP_SD <- 0.2
ETA_PROP_SD <- 1/2000
# MCMC parametrers
N_CHAINS <- 4
N_ITER <- 32000
N_CORES_PER_CHAIN <- 3
BURNIN_PROP <- 0.5
We simulate data under a Gamma-distributed force of inoculation to accommodate population heterogeneity; but perform inference under a model which assumes a homogeneous force of inoculation.
simulate_cohort_het <- function(LAMBDA_MEAN, KAPPA, SEASONALITY, TIME_STEP, N_AGES, N_OBS, NU, R, ETA, PREL, U, SHIFT_AGE) {
t(sapply(N_AGES, function(N_AGE) {
LAMBDA_SCALE <- ifelse(is.infinite(KAPPA), LAMBDA_MEAN, rgamma(1, shape=KAPPA, scale=LAMBDA_MEAN/KAPPA))
LAMBDA <- LAMBDA_SCALE*SEASONALITY
LAMBDA_ADJUSTED <- c(head(LAMBDA, -(N_OBS+N_AGE-SHIFT_AGE))*U, tail(LAMBDA, N_OBS+N_AGE-SHIFT_AGE))
inf <- simulate_patient(LAMBDA_ADJUSTED, TIME_STEP, N_AGE, N_OBS, NU, R, ETA, PREL) - N_AGE*TIME_STEP
inf <- (inf[inf>=0 & inf<=N_OBS*TIME_STEP])%/%TIME_STEP+1
y <- rep(0, N_OBS); y[inf] <- 1; return(y)}))
}
simulate_dataset_het <- function(LAMBDA_MEAN, KAPPA, SEASONALITY, TIME_STEP, N_AGES, N_OBS, NU, R, ETA, PREL,
PROPHYLAXIS_WINDOW, BUNCHING_WINDOW, PCLIN, MASKING_MATRIX, U, SHIFT_AGE) {
all_inf <- simulate_cohort_het(LAMBDA_MEAN, KAPPA, SEASONALITY, TIME_STEP, N_AGES, N_OBS, NU, R, ETA, PREL, U, SHIFT_AGE)
inf_matrix <- simulate_masking(all_inf, PCLIN, PROPHYLAXIS_WINDOW, BUNCHING_WINDOW, MASKING_MATRIX)
return(inf_matrix)
}
set.seed("08042024")
pop_het <- list()
for (k in 1:nrow(PARAM_TRUTH_HET)) {
if (RUN_ALL | !file.exists(paste0("Simulation_studies/Pop_heterogeneity/pop_het_", k, ".rds"))) {
# simulate recurrences using direct stochastic simulation
inf_matrix <- simulate_cohort_het(PARAM_TRUTH_HET[k, "LAMBDA_MEAN_TRUTH"],
PARAM_TRUTH_HET[k, "KAPPA_TRUTH"],
SEASONALITY, TIME_STEP, N_AGES, N_OBS,
PARAM_TRUTH_HET[k, "NU_TRUTH"], 1,
PARAM_TRUTH_HET[k, "ETA_TRUTH"], PREL, U=1, SHIFT_AGE=1)
# retain up to MAX_INF infections per individual
for (i in 1:nrow(inf_matrix)) {
y <- cumsum(inf_matrix[i,])
inf_matrix[i, y>MAX_INF] <- NA
}
# generate a ternary infection matrix adjusted for post-exposure prophylaxis
ternary_matrix <- matrix("M", nrow=nrow(inf_matrix), ncol=ncol(inf_matrix))
ternary_matrix[inf_matrix==0] <- "N"
ternary_matrix[inf_matrix==1] <- "C"
for (i in 1:nrow(inf_matrix)) {
inf_times <- which(inf_matrix[i,]==1)
masked_times <- which(is.na(inf_matrix[i,]))
if (PROPHYLAXIS_WINDOW>0) {
for (j in 1:ncol(inf_matrix)) {
if (!is.na(inf_matrix[i, j]) & inf_matrix[i, j]==1) {
# period of complete prophylactic protection
inf_matrix[i, subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- NA
ternary_matrix[i, subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- "M"
# prophylactic bunching period
check <- any(inf_matrix[i, subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW),
j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))]==1)
check <- ifelse(is.na(check), FALSE, check)
if (check) {
inf_matrix[i, subset(j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW,
j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW<=ncol(inf_matrix))] <- 1
inf_matrix[i, subset(j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1)),
j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1))<=ncol(inf_matrix))] <- 0
ternary_matrix[i, subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW),
j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))] <- "B"
}
}
}
}
ternary_matrix[i, masked_times] <- "M"
}
# generate S vectors
S_vectors <- generate_S_vectors(split(ternary_matrix, 1:nrow(ternary_matrix)))
# https://stackoverflow.com/questions/8413188/can-i-nest-parallelparlapply
# parallelise over chains
cl <- makeCluster(N_CHAINS)
# export objects from local function environment
clusterExport(cl, c('S_vectors', 'N_AGES', 'PREL', 'N_OBS', 'TIME_STEP',
'SEASONALITY', 'N_ITER', 'N_CORES_PER_CHAIN',
'LAMBDA_PROP_SD', 'NU_PROP_SD', 'ETA_PROP_SD'),
envir=environment())
MCMC_results <- parLapply(cl, 1:length(cl), function(CHAIN) {
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
param_vals <- data.frame(matrix(ncol = 6, nrow = N_ITER))
colnames(param_vals) <- c("CHAIN", "ITER", "LAMBDA", "NU", "ETA", "LOG_LIK")
param_vals[, "CHAIN"] <- CHAIN
param_vals[, "ITER"] <- 1:N_ITER
# initialise parameter values
param_vals[1, "LAMBDA"] <- runif(1, 0.1/365, 1/365)
param_vals[1, "NU"] <- runif(1, 0.5, 8)
param_vals[1, "ETA"] <- runif(1, 1/500, 1/50)
param_vals[1, "LOG_LIK"] <-
cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES,
1, SEASONALITY*param_vals[1, "LAMBDA"],
param_vals[1, "ETA"], param_vals[1, "NU"], PREL,
N_OBS, TIME_STEP, 1, 1, N_CORES_PER_CHAIN)
for (i in 2:N_ITER) {
# sample parameter values from proposal distrbution
lambda_prop <- rnorm(1, param_vals[i-1, "LAMBDA"], LAMBDA_PROP_SD)
if (lambda_prop<0) lambda_prop <- param_vals[i-1, "LAMBDA"]
nu_prop <- rnorm(1, param_vals[i-1, "NU"], NU_PROP_SD)
if (nu_prop<0) nu_prop <- param_vals[i-1, "NU"]
eta_prop <- rnorm(1, param_vals[i-1, "ETA"], ETA_PROP_SD)
if (eta_prop<0) eta_prop <- param_vals[i-1, "ETA"]
# calculate likelihood ratio
likelihood_prop <-
cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES,
1, SEASONALITY*lambda_prop,
eta_prop, nu_prop, PREL, N_OBS, TIME_STEP,
1, 1, N_CORES_PER_CHAIN)
likelihood_ratio <- exp(likelihood_prop-param_vals[i-1, "LOG_LIK"])
# accept or reject
if (runif(1)<=likelihood_ratio) {
param_vals[i, "LAMBDA"] <- lambda_prop
param_vals[i, "NU"] <- nu_prop
param_vals[i, "ETA"] <- eta_prop
param_vals[i, "LOG_LIK"] <- likelihood_prop
} else {
param_vals[i, 3:6] <- param_vals[i-1, 3:6]
}
}
return(param_vals)
})
collated_results <- list(MCMC_results=bind_rows(MCMC_results),
LAMBDA_MEAN_TRUTH=PARAM_TRUTH_HET[k, "LAMBDA_MEAN_TRUTH"],
NU_TRUTH=PARAM_TRUTH_HET[k, "NU_TRUTH"],
KAPPA_TRUTH=PARAM_TRUTH_HET[k, "KAPPA_TRUTH"],
ETA_TRUTH=PARAM_TRUTH_HET[k, "ETA_TRUTH"],
ternary_matrix=ternary_matrix)
write_rds(collated_results,
paste0("Simulation_studies/Pop_heterogeneity/pop_het_", k, ".rds"),
compress="gz")
} else {
collated_results <- read_rds(paste0("Simulation_studies/Pop_heterogeneity/pop_het_", k, ".rds"))
}
pop_het[[k]] <- collated_results
}
## $`000684931506849315`
## NULL
##
## $`00136986301369863`
## NULL
## $`000684931506849315`
## NULL
##
## $`00136986301369863`
## NULL
Based on the incidence of symptomatic falciparum malaria (adjusted for treatment failure and left/right censoring), we estimate the degree of transmission heterogeneity in the SPf66 vaccine trial. We pool data across age groups, given there is appears to be no statistically meaningful age structure in the incidence of symptomatic falciparum malaria.
incidence_by_indiv <- read_rds("Spf66_data_processed/incidence_by_individual.rds")
N_ITER_HET <- 8000
LOG_KAPPA_PRIOR_MEAN=0.5
LOG_KAPPA_PRIOR_SD=1
LOG_KAPPA_PROP_SD=0.1
proportion_bites_top_20 <- function(k) {
0.2+dgamma(qgamma(0.8, shape=k, scale=1/k), shape=k+1, scale=1/k)/k
}
negbin_lhood <- function(lambda, kappa) {
sum(mapply(function(N, t) dnbinom(N, size=kappa, mu=lambda*t, log=TRUE),
incidence_by_indiv$N_Pf, incidence_by_indiv$Followup_Pf))
}
set.seed("01052024")
if (RUN_ALL | !file.exists("Simulation_studies/Pop_heterogeneity/SPf66_Pf_het_fits.rds")) {
Pf_het_fit <- data.frame(matrix(ncol = 4, nrow = 0))
colnames(Pf_het_fit) <- c("CHAIN", "ITER", "LAMBDA", "LOG_KAPPA")
for (chain in 1:N_CHAINS) {
# initialise
param_vals <- data.frame(matrix(ncol = 3, nrow = N_ITER_HET))
colnames(param_vals) <- c("LAMBDA", "LOG_KAPPA", "LOG_LIK")
param_vals[1, "LAMBDA"] <- runif(1, 0.2, 4)
param_vals[1, "LOG_KAPPA"] <- rnorm(1, LOG_KAPPA_PRIOR_MEAN, LOG_KAPPA_PRIOR_SD)
param_vals[1, "LOG_LIK"] <- negbin_lhood(param_vals[1, "LAMBDA"], exp(param_vals[1, "LOG_KAPPA"]))
for (i in 2:N_ITER_HET) {
# sample parameter values from proposal distrbution
lambda_prop <- rnorm(1, param_vals[i-1, "LAMBDA"], LAMBDA_PROP_SD*365)
if (lambda_prop<0) lambda_prop <- param_vals[i-1, "LAMBDA"]
kappa_prop <- rnorm(1, param_vals[i-1, "LOG_KAPPA"], LOG_KAPPA_PROP_SD)
# calculate likelihood ratio
likelihood_prop <- negbin_lhood(lambda_prop, exp(kappa_prop))
likelihood_ratio <- exp(likelihood_prop-param_vals[i-1, "LOG_LIK"])*
dnorm(kappa_prop, LOG_KAPPA_PRIOR_MEAN, LOG_KAPPA_PRIOR_SD)/
dnorm(param_vals[i-1, "LOG_KAPPA"], LOG_KAPPA_PRIOR_MEAN, LOG_KAPPA_PRIOR_SD)
# accept or reject
if (runif(1)<=likelihood_ratio) {
param_vals[i, "LAMBDA"] <- lambda_prop
param_vals[i, "LOG_KAPPA"] <- kappa_prop
param_vals[i, "LOG_LIK"] <- likelihood_prop
} else {
param_vals[i, ] <- param_vals[i-1,]
}
}
Pf_het_fit <- bind_rows(Pf_het_fit, bind_cols(ITER=1:N_ITER_HET, CHAIN=chain, param_vals))
}
write_rds(Pf_het_fit,
"Simulation_studies/Pop_heterogeneity/SPf66_Pf_het_fits.rds",
compress="gz")
} else {
Pf_het_fit <- read_rds("Simulation_studies/Pop_heterogeneity/SPf66_Pf_het_fits.rds")
}
The assumption of geometrically-distributed batch sizes may be misspecified. We therefore perform a simulation study, whereby data are simulated under negative binomial sporozoite batch sizes (which may be under- or over-dispersed relative to the geometric distribution), but inference is performed under a misspecified framework predicated on geometric batch sizes.
set.seed("11122023")
batch_sensitivity <- list()
for (k in 1:nrow(PARAM_TRUTH_BATCH)) {
if (RUN_ALL | !file.exists(paste0("Simulation_studies/Batch_size/batch_sensitivity_", k, ".rds"))) {
# simulate recurrences using direct stochastic simulation
inf_matrix <- simulate_cohort(SEASONALITY*PARAM_TRUTH_BATCH[k, "LAMBDA_MEAN_TRUTH"],
TIME_STEP, N_AGES, N_OBS,
PARAM_TRUTH_BATCH[k, "NU_TRUTH"], PARAM_TRUTH_BATCH[k, "R_TRUTH"],
PARAM_TRUTH_BATCH[k, "ETA_TRUTH"], PREL, U=1, SHIFT_AGE=1)
# retain up to MAX_INF infections per individual
for (i in 1:nrow(inf_matrix)) {
y <- cumsum(inf_matrix[i,])
inf_matrix[i, y>MAX_INF] <- NA
}
# generate a ternary infection matrix adjusted for post-exposure prophylaxis
ternary_matrix <- matrix("M", nrow=nrow(inf_matrix), ncol=ncol(inf_matrix))
ternary_matrix[inf_matrix==0] <- "N"
ternary_matrix[inf_matrix==1] <- "C"
for (i in 1:nrow(inf_matrix)) {
inf_times <- which(inf_matrix[i,]==1)
masked_times <- which(is.na(inf_matrix[i,]))
if (PROPHYLAXIS_WINDOW>0) {
for (j in 1:ncol(inf_matrix)) {
if (!is.na(inf_matrix[i, j]) & inf_matrix[i, j]==1) {
# period of complete prophylactic protection
inf_matrix[i, subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- NA
ternary_matrix[i, subset(j+(1:PROPHYLAXIS_WINDOW), j+(1:PROPHYLAXIS_WINDOW)<=ncol(inf_matrix))] <- "M"
# prophylactic bunching period
check <- any(inf_matrix[i, subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW),
j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))]==1)
check <- ifelse(is.na(check), FALSE, check)
if (check) {
inf_matrix[i, subset(j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW,
j+PROPHYLAXIS_WINDOW+BUNCHING_WINDOW<=ncol(inf_matrix))] <- 1
inf_matrix[i, subset(j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1)),
j+PROPHYLAXIS_WINDOW+(1:(BUNCHING_WINDOW-1))<=ncol(inf_matrix))] <- 0
ternary_matrix[i, subset(j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW),
j+PROPHYLAXIS_WINDOW+(1:BUNCHING_WINDOW)<=ncol(inf_matrix))] <- "B"
}
}
}
}
ternary_matrix[i, masked_times] <- "M"
}
# generate S vectors
S_vectors <- generate_S_vectors(split(ternary_matrix, 1:nrow(ternary_matrix)))
# https://stackoverflow.com/questions/8413188/can-i-nest-parallelparlapply
# parallelise over chains
cl <- makeCluster(N_CHAINS)
# export objects from local function environment
clusterExport(cl, c('S_vectors', 'N_AGES', 'PREL', 'N_OBS', 'TIME_STEP',
'SEASONALITY', 'N_ITER', 'N_CORES_PER_CHAIN',
'LAMBDA_PROP_SD', 'NU_PROP_SD', 'ETA_PROP_SD'),
envir=environment())
MCMC_results <- parLapply(cl, 1:length(cl), function(CHAIN) {
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
param_vals <- data.frame(matrix(ncol = 6, nrow = N_ITER))
colnames(param_vals) <- c("CHAIN", "ITER", "LAMBDA", "NU", "ETA", "LOG_LIK")
param_vals[, "CHAIN"] <- CHAIN
param_vals[, "ITER"] <- 1:N_ITER
# initialise parameter values
param_vals[1, "LAMBDA"] <- runif(1, 0.1/365, 1/365)
param_vals[1, "NU"] <- runif(1, 0.5, 8)
param_vals[1, "ETA"] <- runif(1, 1/500, 1/50)
param_vals[1, "LOG_LIK"] <-
cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES,
1, SEASONALITY*param_vals[1, "LAMBDA"],
param_vals[1, "ETA"], param_vals[1, "NU"], PREL,
N_OBS, TIME_STEP, 1, 1, N_CORES_PER_CHAIN)
for (i in 2:N_ITER) {
# sample parameter values from proposal distrbution
lambda_prop <- rnorm(1, param_vals[i-1, "LAMBDA"], LAMBDA_PROP_SD)
if (lambda_prop<0) lambda_prop <- param_vals[i-1, "LAMBDA"]
nu_prop <- rnorm(1, param_vals[i-1, "NU"], NU_PROP_SD)
if (nu_prop<0) nu_prop <- param_vals[i-1, "NU"]
eta_prop <- rnorm(1, param_vals[i-1, "ETA"], ETA_PROP_SD)
if (eta_prop<0) eta_prop <- param_vals[i-1, "ETA"]
# calculate likelihood ratio
likelihood_prop <-
cohort_likelihood(S_vectors[["S_plus"]], S_vectors[["S_minus"]], N_AGES,
1, SEASONALITY*lambda_prop,
eta_prop, nu_prop, PREL, N_OBS, TIME_STEP,
1, 1, N_CORES_PER_CHAIN)
likelihood_ratio <- exp(likelihood_prop-param_vals[i-1, "LOG_LIK"])
# accept or reject
if (runif(1)<=likelihood_ratio) {
param_vals[i, "LAMBDA"] <- lambda_prop
param_vals[i, "NU"] <- nu_prop
param_vals[i, "ETA"] <- eta_prop
param_vals[i, "LOG_LIK"] <- likelihood_prop
} else {
param_vals[i, 3:6] <- param_vals[i-1, 3:6]
}
}
return(param_vals)
})
collated_results <- list(MCMC_results=bind_rows(MCMC_results),
LAMBDA_TRUTH=PARAM_TRUTH_BATCH[k, "LAMBDA_MEAN_TRUTH"],
NU_TRUTH=PARAM_TRUTH_BATCH[k, "NU_TRUTH"],
R_TRUTH=PARAM_TRUTH_BATCH[k, "R_TRUTH"],
ETA_TRUTH=PARAM_TRUTH_BATCH[k, "ETA_TRUTH"],
ternary_matrix=ternary_matrix)
write_rds(collated_results,
paste0("Simulation_studies/Batch_size/batch_sensitivity_", k, ".rds"),
compress = "gz")
} else {
collated_results <- read_rds(paste0("Simulation_studies/Batch_size/batch_sensitivity_", k, ".rds"))
}
batch_sensitivity[[k]] <- collated_results
}
## $`4`
## NULL
##
## $`6`
## NULL
##
## $`8`
## NULL
## $`4`
## NULL
##
## $`6`
## NULL
##
## $`8`
## NULL